# author: Han Zhang
library(ggplot2)
library(reshape2)


dp = read.csv("keyword_search_placebo_count.csv", sep = "\t", stringsAsFactors = F)


df = read.csv('../../output/protest_events.csv', sep = "\t", stringsAsFactors = F)
ddt <- strsplit(df$event_id, "_")
ddt <- matrix(unlist(ddt), ncol=2, byrow=TRUE)
df$date <- as.Date(ddt[,2])
df$month = substr(df$date, 0, 7)

df_month_count = as.data.frame.table(table(df$month))
colnames(df_month_count) <- c("month",   "count_all")
df_month_count$Dataset = "CASM"
df_month_count[1:(nrow(df_month_count)-1),]
df_month_count$quarter = as.numeric(substr(df_month_count$month,6,8))
df_month_count = df_month_count[df_month_count$month != "2017-07",]

df_month_count$year = substr(df_month_count$month, 1,4)


dd = rbind(df_month_count, dp[,c("month","count_all","Dataset","quarter","year")])

### recode quarter
dd$quarter = as.numeric(substr(dd$month,6,8))
dd <- dd[dd$year>"2009",]
quar <- function(x,y) # x is quarter, y is dataset label
{
  if(x %% 3 == 1 & y == "CASM") 
  {
    if (x%/%3 == 0)
    {
      return ("Jan")
    }
    if (x%/%3 == 1)
    {return ("Apr")}
    if (x%/%3 == 2)
    {return ("Jul")}
    if (x%/%3 == 3)
    {return ("Oct")}
  }
  else {
    return ("");    
  }
}

dd$quarter <- mapply(quar, dd$quarter, dd$Dataset)
dd$month_numeric <- as.numeric(as.factor(dd$month))

quarter_label <- dd[dd$Dataset == "CASM", ]$quarter

dd[dd$Dataset == "CASM","Dataset"]<- "CASM-China events"
dd[dd$Dataset == "Posts with protest-irrelevant words","Dataset"]<- "Posts with idiom unrelated to protest"
dd[dd$Dataset == "Posts with 50 keywords (in hundreds)","Dataset"]<- "Posts with 50 keywords (hundreds)"


dd <- dd[dd$month != "2017-07", ]
cairo_pdf("CASM_keyword_irrelevant_count.pdf", width = 12, height = 8)


g1 <- ggplot(dd, aes(x= month_numeric,
                     y = count_all, 
                     group = Dataset, shape = Dataset )) + 
  # geom_point() +
  geom_line(aes(linetype = Dataset)) +
  scale_shape_discrete() +
  geom_hline(aes(yintercept = 0), linetype = "dashed") +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())  +
  theme(plot.margin = unit(c(1, 1, 4, 1), "lines"),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x=element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank()) +
  theme(axis.text.y= element_text(size = 18, colour = 'black'),
        axis.title.y=element_text(size = 20, colour = 'black'),
        legend.text = element_text(size = 18, colour = 'black'),
        legend.title  = element_text(size = 20, colour = 'black')) +
  theme(legend.position = c(0.78,0.91)) +
  ylab ("Monthly Count") + 
  xlab ("Month") + 
  coord_cartesian(ylim = c(-250, 5000), expand = T) + 
  annotate(geom = "text", x = seq_len(length(quarter_label)), y = - 150, label = quarter_label, size = 5) +
  annotate(geom = "text", x = 8 + 11 * (0: (length( unique(dd$year))-1)), y = - 300, label = unique(dd$year), size = 6) 

g2 <- ggplot_gtable(ggplot_build(g1))
g2$layout$clip[g2$layout$name == "panel"] <- "off"
grid::grid.draw(g2)


dev.off()





